# ============================== SETUP ===============================
rm(list = ls())
set.seed(1)
options(scipen = 999)
setwd("~/Dropbox/Wayne-Ying/White_Nationalist_Recruitment/replication/codes")
library(tidyverse)
library(data.table)
library(dplyr)
library(vars)

# ========================= DATA WRANGLING ==========================
# ---- tweets ----
tweets <- fread("../datasets/input/tweets.csv", stringsAsFactors = FALSE)
tweets <- tweets[tweets$RelevantLogit==1,]
tweets <- tweets[tweets$ads_dict==0,]

tweets$race_dict       <- ifelse(tweets$race_dict >=2, 1, 0)
tweets$gender_dict     <- ifelse(tweets$gender_dict >=2, 1, 0)
tweets$nationalism_dict<- ifelse(tweets$nationalism_dict >=2, 1, 0)
tweets$partisan_dict   <- ifelse(tweets$partisan_dict >=2, 1, 0)
tweets$religion_dict   <- ifelse(tweets$religion_dict >=2, 1, 0)

tweets$date  <- as.Date(tweets$created_at)
# robustness check: only keep followers following 7 or more leaders
tweets <- tweets[tweets$seedfollow >= 7, ]
tweets$group <- ifelse(tweets$seedfollow == 999, "leader", "follower")

tweets <- tweets[,c("date", "group", "gender_dict", "nationalism_dict", "partisan_dict", "race_dict", "religion_dict")]
tweets <- tweets %>% 
  group_by(date, group) %>%
  summarize(Religion = mean(religion_dict, na.rm = TRUE),
            Race     = mean(race_dict, na.rm = TRUE),
            Nationalism = mean(nationalism_dict, na.rm = TRUE),
            Partisan = mean(partisan_dict, na.rm = TRUE),
            Gender   = mean(gender_dict, na.rm = TRUE)) 

tweets <- tweets %>%
  pivot_wider(names_from = group, values_from = c(Religion, Race, Nationalism, Partisan, Gender))
tweets <- tweets[order(tweets$date),]
tweets$platform <- 1

# ---- gabs ----
gabs <- fread("../datasets/input/gabs.csv", stringsAsFactors = FALSE)
gabs <- gabs[gabs$RelevantLogit==1,]
gabs <- gabs[gabs$ads_dict==0,]

gabs$race_dict        <- ifelse(gabs$race_dict >=2, 1, 0)
gabs$gender_dict      <- ifelse(gabs$gender_dict >=2, 1, 0)
gabs$nationalism_dict <- ifelse(gabs$nationalism_dict >=2, 1, 0)
gabs$partisan_dict    <- ifelse(gabs$partisan_dict >=2, 1, 0)
gabs$religion_dict    <- ifelse(gabs$religion_dict >=2, 1, 0)

gabs$date  <- as.Date(gabs$created_at)
# robustness check: only keep followers following 7 or more leaders
gabs <- gabs[gabs$seedfollow >= 7, ]
gabs$group <- ifelse(gabs$seedfollow == 999, "leader", "follower")

gabs <- gabs[,c("date", "group", "gender_dict", "nationalism_dict", "partisan_dict", "race_dict", "religion_dict")]
gabs <- gabs %>% 
  group_by(date, group) %>%
  summarize(Religion = mean(religion_dict, na.rm = TRUE),
            Race     = mean(race_dict, na.rm = TRUE),
            Nationalism = mean(nationalism_dict, na.rm = TRUE),
            Partisan = mean(partisan_dict, na.rm = TRUE),
            Gender   = mean(gender_dict, na.rm = TRUE)) 

gabs <- gabs %>%
  pivot_wider(names_from = group, values_from = c(Religion, Race, Nationalism, Partisan, Gender))
gabs <- gabs[order(gabs$date),]
gabs$platform <- 0

# ---- combine ----
comb <- rbind(gabs, tweets)
comb <- comb[order(comb$date),]
comb <- comb[comb$date>=as.Date("2016-08-15") & comb$date<=as.Date("2021-05-15"),]
comb <- na.omit(comb)
colnames(comb)
comb <- comb[,c("date", "Religion_follower", "Religion_leader", "Race_follower", "Race_leader", "Nationalism_follower",
                "Nationalism_leader", "Partisan_follower", "Partisan_leader", "Gender_follower", "Gender_leader", "platform")]
comb <- ts(comb)
rm(list = c("gabs", "tweets"))

# ============================ VAR MODELS ===========================

p <- 2
var_model_merged    <- VAR(y = comb[,2:11], p = p, exogen = comb[,12])
var_irfs_cum_merged <- irf(var_model_merged, n.ahead = 20, cumulative = TRUE)

# ======================= EXTRACT IRF RESULTS =======================
var_irfs <- var_irfs_cum_merged
variables <- names(var_irfs$irf)
elements_to_pull <- c("irf", "Upper", "Lower")

irf_data <- NULL
for (el in elements_to_pull) {
  new_irf_info <- var_irfs[el][[1]]
  for (out in variables) {
    new_irf_var_data <- as.data.frame(new_irf_info[out][[1]])
    new_irf_var_data_long <- new_irf_var_data %>%
      gather(cov, value)
    new_irf_var_data_long$out <- out
    new_irf_var_data_long$day <- rep(1:nrow(new_irf_var_data),
                                     length(unique(new_irf_var_data_long$cov)))
    new_irf_var_data_long$e_type <- el
    irf_data <- rbind(irf_data, new_irf_var_data_long)
  }
}

irf_data$e_type <- recode(irf_data$e_type,
                          `irf` = "pe",
                          `Lower` = "lwr",
                          `Upper` = "upr")

irf_data_wide <- irf_data %>%
  mutate(value = (value / 10) * 100) %>%
  spread(e_type, value)

# ========================== PRODUCE FIGURE =========================
final_input <- irf_data_wide

# data wrangling for figure
plot_db <- final_input %>%
  filter(day == 15)

plot_db$cov <- recode(plot_db$cov,
                      "Gender_follower"      = "Gender\n(Followers)",
                      "Gender_leader"        = "Gender\n(Leaders)",
                      "Nationalism_follower" = "Nationalism\n(Followers)",
                      "Nationalism_leader"   = "Nationalism\n(Leaders)",
                      "Partisan_follower"    = "Partisan\n(Followers)",
                      "Partisan_leader"      = "Partisan\n(Leaders)",
                      "Race_follower"        = "Race\n(Followers)",
                      "Race_leader"          = "Race\n(Leaders)",
                      "Religion_follower"    = "Religion\n(Followers)",
                      "Religion_leader"      = "Religion\n(Leaders)")

plot_db$out <- recode(plot_db$out,
                      "Gender_follower"      = "Gender\n(Followers)",
                      "Gender_leader"        = "Gender\n(Leaders)",
                      "Nationalism_follower" = "Nationalism\n(Followers)",
                      "Nationalism_leader"   = "Nationalism\n(Leaders)",
                      "Partisan_follower"    = "Partisan\n(Followers)",
                      "Partisan_leader"      = "Partisan\n(Leaders)",
                      "Race_follower"        = "Race\n(Followers)",
                      "Race_leader"          = "Race\n(Leaders)",
                      "Religion_follower"    = "Religion\n(Followers)",
                      "Religion_leader"      = "Religion\n(Leaders)")

plot_db$cov <- factor(plot_db$cov,
                      levels = rev(c("Race\n(Followers)",
                                     "Race\n(Leaders)",
                                     "Nationalism\n(Followers)",
                                     "Nationalism\n(Leaders)",
                                     "Gender\n(Followers)",
                                     "Gender\n(Leaders)",
                                     "Partisan\n(Followers)",
                                     "Partisan\n(Leaders)",
                                     "Religion\n(Followers)",
                                     "Religion\n(Leaders)")))

plot_db$out <- factor(plot_db$out,
                      levels = c("Race\n(Followers)",
                                 "Race\n(Leaders)",
                                 "Nationalism\n(Followers)",
                                 "Nationalism\n(Leaders)",
                                 "Gender\n(Followers)",
                                 "Gender\n(Leaders)",
                                 "Partisan\n(Followers)",
                                 "Partisan\n(Leaders)",
                                 "Religion\n(Followers)",
                                 "Religion\n(Leaders)"))

pdf("../plots/FigureA13.pdf", width = 12, height = 7)
plot_db_small <- plot_db %>%
  filter(cov %in% c("Race\n(Leaders)",
                    "Nationalism\n(Leaders)",
                    "Gender\n(Leaders)",
                    "Partisan\n(Leaders)",
                    "Religion\n(Leaders)")) %>%
  filter(out %in% c("Race\n(Followers)",
                    "Nationalism\n(Followers)",
                    "Gender\n(Followers)",
                    "Partisan\n(Followers)",
                    "Religion\n(Followers)"))

ggplot(plot_db_small %>%
         mutate(pe = (pe * 100)/10, lwr = (lwr * 100)/10, upr = (upr * 100)/10),
       aes(x = cov, y = pe, ymin = lwr, ymax = upr)) +
  geom_segment(aes(x = cov, xend = cov, y = lwr, yend = upr,
                   color = ifelse((cov=="Gender\n(Leaders)" & out=="Race\n(Followers)") |
                                    (cov=="Partisan\n(Leaders)" & out=="Nationalism\n(Followers)"),
                                  "red", "black")),
               linewidth = 1) +
  geom_point(size = 3, aes(color = ifelse((cov=="Gender\n(Leaders)" & out=="Race\n(Followers)") |
                                            (cov=="Partisan\n(Leaders)" & out=="Nationalism\n(Followers)"),
                                          "red", "black"))) +
  geom_hline(yintercept = 0, color = "red4") +
  facet_wrap(~out, nrow = 1) +
  coord_flip() +
  xlab("") +
  ylab("\n15-day responses (in percentage points)") +
  scale_color_manual(name = "",
                     values = c("black" = "black", "red" = "red"),
                     labels = c("black" = "point estimate with 95% confidence interval"),
                     breaks = "black") +
  theme(
    legend.position = "bottom",
    panel.background = element_blank(),
    panel.grid.major = element_line(colour = "gray90", linetype = "solid"),
    axis.text.x = element_text(size = 16),
    axis.text.y = element_text(size = 16),
    strip.text = element_text(size = 16),
    panel.border = element_rect(colour = "black", fill = FALSE),
    strip.background = element_rect(colour = "black"),
    axis.title = element_text(size = 14),
    legend.text = element_text(size = 14, margin = margin(t = 20), vjust = 5)
  )

plot_db_small <- plot_db %>%
  filter(out %in% c("Race\n(Leaders)",
                    "Nationalism\n(Leaders)",
                    "Gender\n(Leaders)",
                    "Partisan\n(Leaders)",
                    "Religion\n(Leaders)")) %>%
  filter(cov %in% c("Race\n(Followers)",
                    "Nationalism\n(Followers)",
                    "Gender\n(Followers)",
                    "Partisan\n(Followers)",
                    "Religion\n(Followers)"))

ggplot(plot_db_small %>%
         mutate(pe = (pe * 100)/10, lwr = (lwr * 100)/10, upr = (upr * 100)/10),
       aes(x = cov, y = pe, ymin = lwr, ymax = upr)) +
  geom_segment(aes(x = cov, xend = cov, y = lwr, yend = upr, color = "black"),
               linewidth = 1) +
  geom_point(size = 3, aes(color = "black")) +
  geom_hline(yintercept = 0, color = "red4") +
  facet_wrap(~out, nrow = 1) +
  coord_flip() +
  xlab("") +
  ylab("\n15-day responses (in percentage points)") +
  scale_color_manual(name = "",
                     values = c("black" = "black"),
                     labels = c("black" = "point estimate with 95% confidence interval"),
                     breaks = "black") +
  theme(
    legend.position = "bottom",
    panel.background = element_blank(),
    panel.grid.major = element_line(colour = "gray90", linetype = "solid"),
    axis.text.x = element_text(size = 16),
    axis.text.y = element_text(size = 16),
    strip.text = element_text(size = 16),
    panel.border = element_rect(colour = "black", fill = FALSE),
    strip.background = element_rect(colour = "black"),
    axis.title = element_text(size = 14),
    legend.text = element_text(size = 14, margin = margin(t = 20), vjust = 5)
  )
dev.off()